Промоделировать траекторию марковской цепи при помощи алгоритма Метрополиса-Гастингса с подходящим пробным распределением (proposal distribution).
Исследовать траекторию цепи на сходимость (выход на стационарность, автокорреляцию). При необходимости убрать burn-in интервал и провести прореживание.
Везде убедиться в том, что получена выборка именно из нужного распределения.
Исходная работа Хастингса в 1970 г. была посвящена моделированию стандартного нормального распределения с использованием в качестве пробного распределения случайного блуждания с отдельными шагами, полученными из равномерного распределения на отрезке (−δ, δ), δ > 0.
step <- function(
x, dP = dnorm,
rQ = function(x) runif(x, -delta, delta),
dQ = function(x) dunif(x, -delta, delta),
delta = 1) {
y <- x + rQ(length(x))
a <- pmin(1, dP(y) / dP(x) * dQ(x - y) / dQ(y - x)) >= runif(length(x))
x[a] <- y[a]
n <- dim(mu)[1]
if (n == 0) {
mu <<- dP(x)
} else {
mu <<- rbind(mu, 1 / (n + 1) * (tail(mu, 1) * n + dP(x)))
}
count_all <<- count_all + length(a)
count_success <<- count_success + sum(a)
return(x)
}
mcmc <- function(N = 100, iterations = 500, burn.in = 25, thinning = 10, ..., plots = FALSE) {
# N <- 100
# iterations <- 100
count_all <<- 0
count_success <<- 0
mc <<- array(rep(0, N), dim = c(1, N))
mu <<- array(dim = c(0, N))
for (i in 1:iterations) {
mc <<- rbind(mc, step(tail(mc, 1), ...))
}
dimnames(mc)[[1]] <<- 1:dim(mc)[1]
dimnames(mu)[[1]] <<- 1:dim(mu)[1]
# hist(tail(mc, 1))
# shapiro.test(tail(mc, 1))
print("Rate of acceptance of changes")
print(count_success / count_all)
if (plots) {
plot(ggplot(melt(mc)) +
geom_line(aes(x = Var1, y = value, group = Var2)) +
labs(title = "x_n Trajectories"))
plot(ggplot(melt(mu)) +
geom_line(aes(x = Var1, y = value, group = Var2)) +
labs(title = "mu_n Trajectories"))
}
autocor <- apply(mc, 2, function(x) acf(x, lag.max = iterations / 2, pl = FALSE)$acf[, 1, 1])
if (plots) {
acf(mc[, 1], lag.max = iterations / 2)
plot(ggplot() +
geom_line(data = melt(autocor), aes(x = Var1, y = value, group = Var2)) +
# geom_smooth(data = melt(autocor), aes(x = Var1, y = value)) +
geom_abline(intercept = 0, slope = 0, color = "blue") +
geom_line(aes(x = 1:dim(autocor)[1], y = apply(autocor, 1, mean)), color = "red", size = 2) +
labs(title = "Autocorrelation", x = "lag", y = "autocorrelation"))
}
# plot(hist(tail(mc, 1)))
# shapiro.test(tail(mc, 1))
# plot(hist(mc[ seq(100, 1000, 2) ,1]))
tmp <- sapply(1:N, function(x) shapiro.test(mc[seq(burn.in, iterations, thinning), x])$p.value)
if (plots) {
hist(tmp, main = "Histogram of p-values of shapiro tests of generated samples")
}
print("test of uniformity of p-values of shapiro tests of generated samples")
# print("ks.test p_value")
print(ks.test(tmp, "punif"))
# return(mc[seq(burn.in, iterations, thinning), ])
}
mcmc(delta = 0.1, thinning = 50, plots = TRUE)
## [1] "Rate of acceptance of changes"
## [1] 0.98686
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.11049, p-value = 0.1739
## alternative hypothesis: two-sided
mcmc(thinning = 20, plots = TRUE)
## [1] "Rate of acceptance of changes"
## [1] 0.80298
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.082161, p-value = 0.5094
## alternative hypothesis: two-sided
mcmc(delta = 10, thinning = 20, plots = TRUE)
## [1] "Rate of acceptance of changes"
## [1] 0.15796
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.13917, p-value = 0.04157
## alternative hypothesis: two-sided
Оптимальное значение дельты - 3, при нем достигается оптимальная для нормального частота принятия 50% (это на википедии нашел). т.е. достаточно часто, чтобы значение менялось и не слишком маленькими шагами.
mcmc(delta = 3, thinning = 5, plots = TRUE)
## [1] "Rate of acceptance of changes"
## [1] 0.48852
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.20904, p-value = 0.0003202
## alternative hypothesis: two-sided
mcmc(rQ = rcauchy, dQ = dcauchy, plots = TRUE)
## [1] "Rate of acceptance of changes"
## [1] 0.53314
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.10016, p-value = 0.2683
## alternative hypothesis: two-sided
mcmc(rQ = rlaplace, dQ = dlaplace, plots = TRUE)
## [1] "Rate of acceptance of changes"
## [1] 0.66488
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.075286, p-value = 0.6224
## alternative hypothesis: two-sided
mcmc(delta = 6.5, thinning = 20)
## [1] "Rate of acceptance of changes"
## [1] 0.24958
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.065717, p-value = 0.7808
## alternative hypothesis: two-sided
mcmc(rQ = function(x) rcauchy(x, scale = 3.5), dQ = function(x) dcauchy(x, scale = 3.5))
## [1] "Rate of acceptance of changes"
## [1] 0.24672
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.12542, p-value = 0.08603
## alternative hypothesis: two-sided
mcmc(rQ = function(x) rlaplace(x, s = 5), dQ = function(x) dlaplace(x, s = 5))
## [1] "Rate of acceptance of changes"
## [1] 0.25192
## [1] "test of uniformity of p-values of shapiro tests of generated samples"
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: tmp
## D = 0.13202, p-value = 0.06125
## alternative hypothesis: two-sided